********************************************************************************
* matching_firm_diff_market.do
* Purpose: ROBUSTNESS CHECK - Firm PSM requiring matched pairs to operate in
*          DIFFERENT geographic markets (no market overlap).
*
* Difference from baseline (matching_firm.do):
*   - For each treated firm in market m (CZ x NAICS), control candidates must
*     NOT be in market m. The matching loop iterates over treated markets and
*     restricts controls to firms NOT in that market.
*   - This ensures the control firm is not directly affected by the same M&A
*     event through product/labor market channels.
*
* Key structural difference: matching is done within a loop over markets,
* not at the full-sample level. Each treated firm is matched to controls
* from outside its own CZ x NAICS market.
*
* Output: $data/firm_matched_diff_market.dta
*         $data/firm_matched_diff_market_list.dta
********************************************************************************
global start_year 	= 2004
global end_year 	= 2016

forvalues y = $start_year/$end_year {
	run "Z:\VRDC-PROJ-6730\Moon_6730\ado\_\_gtools_internal.mata"
	
	use $data/firm_`y', replace

	** drop firms that have less than 10 employees in year y **
	keep if PD7_AvgEmp_NonZero >= 10

	** drop firms with missing values for the match vars **
	drop if mi(naics) | mi(total_revenue) | mi(age) | mi(avg_wage)
	
	** Define treated for firms that go through an M&A event the following year **
	gen treated		= (DEAL_YEAR == `y' + 1)
	
	** drop other M&A firms that are treated in other years ** 
	drop if MnA_firm == 1 & treated == 0
	
	gsort -treated
	
	* create age, average payrolls, and revenue bins
	foreach var in avg_wage total_revenue age{
	
		if `var' == avg_wage{
			gquantiles 	bin_`var' = `var', xtile n(10)
		}
		else{
			gquantiles 	bin_`var' = `var', xtile n(15)
		}
	}
	* create interactions between bins
	* firms must be matches within the same cells
	egen cell = group(naics2 OPAddressProvince bin_*)
	drop if cell == .
		
	* estimate pscore using the eligible sample
	reg treated c.total_revenue##c.total_revenue c.age##c.age c.avg_wage##c.avg_wage
	
	predict pscore

	replace pscore = pscore + 10.0*cell
	
	gen outcome = rnormal()
	
	** we sort the data for replication
	gsort entid_syn
	
	gen		id			=	.
	gen		matched_id	=	.

	* Define market as CZ (sac_syn) x NAICS industry
	egen market = group(sac_syn naics)

	preserve
	* Loop over each treated market: for each treated firm in market m,
	* controls must come from firms NOT in market m (different-market requirement)
	levelsof market if treated == 1, local(ts)
	foreach mrkt in `ts'{
		restore, preserve

		* Keep treated firms in THIS market + control firms from ALL OTHER markets
		keep if (market == `mrkt' & treated == 1) | (market ~= `mrkt' & treated == 0)
		
		// Matching //
		cap psmatch2 treated, outcome(outcome) pscore(pscore) caliper(1) n(1) noreplacement
		
		cap replace	 id = _id if treated==0 & _weight==1
		cap replace	 id = _n1 if treated==1 & _weight==1
		replace 	matched_id	=	`mrkt'
		drop if mi(id)
		
		save $data/firms_in_market_`mrkt', replace	
	}
	restore
	
	clear 
	foreach mrkt in `ts'{
	
		append	using $data/firms_in_market_`mrkt', force
		erase $data/firms_in_market_`mrkt'.dta
	}
	
	egen pairid = group(id matched_id)
	sort pairid
	
	keep entid_syn treated pairid
	
	gen year_prior = `y'
	
	compress
	save $data/firm_matched_diff_market_list_`y'.dta,  replace

}

drop _all
forvalues y = $start_year/$end_year {
	append using $data/firm_matched_diff_market_list_`y',  keep(entid_syn pairid treated year_prior) force
}
save $data/firm_matched_diff_market_list, replace

forvalues y = 2001/2017{

	use $data/firm_`y', clear

	* keep matched treated and control firms
	merge 1:m 	entid_syn using $data/firm_matched_diff_market_list, keep(3) nogen

	save $data/firm_matched_diff_market_`y', replace

}

drop _all
forvalues y = 2001/2017{
	append using $data/firm_matched_diff_market_`y', force
	rm $data/firm_matched_diff_market_`y'.dta
}

save $data/firm_matched_diff_market_intermid, replace

gsort pairid year_prior -treated year

**# Matched Firm Panel
* create id and deal type variables
* define control variables suchas sector and age
* drop financial sectors
egen 	firm_id     = group(entid_syn)
egen 	id 			= group(firm_id year_prior)

replace DEAL 	 = . if year ~= year_prior
replace Acquirer = . if year ~= year_prior
replace DEAL_YEAR= . if year ~= year_prior

**# Time Variables
gen 	t = year - (year_prior + 1)
replace t = 6 	if t >  5 	& ~mi(t)
replace t = -6 	if t < -5 	& ~mi(t)
tab t, gen(ds_)
levelsof t, local(ts)
local end = r(r)
forvalue i = 1(1)`end'{
	local temp = `i' - 7
	label variable ds_`i' "`temp'"
}
replace ds_6 = 0

** sector - - fill in for missing sector values
drop naics2
egen	naics_mode	= mode(naics), by(entid_syn) minmode
replace naics		= naics_mode	if mi(naics)
gen 	naics2 		= int(naics/100)
replace naics2  	= 31 if naics2 == 32 | naics2 == 33
replace naics2  	= 44 if naics2 == 45
replace naics2  	= 48 if naics2 == 49
replace naics2 		= 54 if naics2 == 56 | naics2 == 61 | naics2 == 62

* the sector prior to the event
gen 	naics2_event	= naics2 if treated == 1 & year == year_prior
gegen 	matched_sector 	= firstnm(naics2_event),		by(pairid year_prior)

* firm age - fill in for missing age values
drop 	dateinc dateinc_year age
gen 	temp_dateinc 	= dofc(BirthDate) if year == year_prior
gegen	dateinc 		= firstnm(temp_dateinc), by(id)
gen 	dateinc_year 	= yofd(dateinc)
gen 	age 			= year - dateinc_year
gen 	age2 			= age^2
gen 	age3 			= age^3
gen 	age4 			= age^4

replace Acquirer 	= . if ~(treated == 1 & year == year_prior)
replace merger 		= . if ~(treated == 1 & year == year_prior)

* pair-level variables
gsort pairid year_prior -treated year
gegen matched_acq 		= firstnm(Acquirer), 			by(pairid year_prior)
gegen matched_deal_type 	= firstnm(merger),				by(pairid year_prior)

** drop financial sectors **
drop if matched_sector == 52 | matched_sector == 55

compress
save $data/firm_matched_diff_market, replace